7 /****************************
8 * implementation dependent *
9 ****************************/
11 /* template for workfiles (8.3 format) */
12 #define FNAME "_sort%03d.dat"
15 /* comparison operators */
16 #define compLT(x,y) (x < y)
17 #define compGT(x,y) (x > y)
19 /* define the record to be sorted here */
22 typedef struct recTypeTag
{
23 keyType key
; /* sort key for record */
25 char data
[LRECL
-sizeof(keyType
)]; /* other fields */
29 /******************************
30 * implementation independent *
31 ******************************/
33 typedef enum {false, true} bool;
35 typedef struct tmpFileTag
{
36 FILE *fp
; /* file pointer */
37 char name
[LNAME
]; /* filename */
38 recType rec
; /* last record read */
39 int dummy
; /* number of dummy runs */
40 bool eof
; /* end-of-file flag */
41 bool eor
; /* end-of-run flag */
42 bool valid
; /* true if rec is valid */
43 int fib
; /* ideal fibonacci number */
46 static tmpFileType
**file
; /* array of file info for tmp files */
47 static int nTmpFiles
; /* number of tmp files */
48 static char *ifName
; /* input filename */
49 static char *ofName
; /* output filename */
51 static int level
; /* level of runs */
52 static int nNodes
; /* number of nodes for selection tree */
54 void deleteTmpFiles(void) {
57 /* delete merge files and free resources */
59 for (i
= 0; i
< nTmpFiles
; i
++) {
61 if (file
[i
]->fp
) fclose(file
[i
]->fp
);
62 if (*file
[i
]->name
) remove(file
[i
]->name
);
70 void termTmpFiles(int rc
) {
77 /* file[T] contains results */
78 fileT
= nTmpFiles
- 1;
79 fclose(file
[fileT
]->fp
); file
[fileT
]->fp
= NULL
;
80 if (rename(file
[fileT
]->name
, ofName
)) {
85 *file
[fileT
]->name
= 0;
90 void cleanExit(int rc
) {
92 /* cleanup tmp files and exit */
97 void *safeMalloc(size_t size
) {
100 /* safely allocate memory and initialize to zero */
101 if ((p
= calloc(1, size
)) == NULL
) {
102 printf("error: malloc failed, size = %d\n", size
);
108 void initTmpFiles(void) {
110 tmpFileType
*fileInfo
;
112 /* initialize merge files */
113 if (nTmpFiles
< 3) nTmpFiles
= 3;
114 file
= safeMalloc(nTmpFiles
* sizeof(tmpFileType
*));
115 fileInfo
= safeMalloc(nTmpFiles
* sizeof(tmpFileType
));
116 for (i
= 0; i
< nTmpFiles
; i
++) {
117 file
[i
] = fileInfo
+ i
;
118 sprintf(file
[i
]->name
, FNAME
, i
);
119 if ((file
[i
]->fp
= fopen(file
[i
]->name
, "w+b")) == NULL
) {
126 recType
*readRec(void) {
128 typedef struct iNodeTag
{ /* internal node */
129 struct iNodeTag
*parent
;/* parent of internal node */
130 struct eNodeTag
*loser
; /* external loser */
133 typedef struct eNodeTag
{ /* external node */
134 struct iNodeTag
*parent
;/* parent of external node */
135 recType rec
; /* input record */
136 int run
; /* run number */
137 bool valid
; /* input record is valid */
140 typedef struct nodeTag
{
141 iNodeType i
; /* internal node */
142 eNodeType e
; /* external node */
145 static nodeType
*node
; /* array of selection tree nodes */
146 static eNodeType
*win
; /* new winner */
147 static FILE *ifp
; /* input file */
148 static bool eof
; /* true if end-of-file, input */
149 static int maxRun
; /* maximum run number */
150 static int curRun
; /* current run number */
151 iNodeType
*p
; /* pointer to internal nodes */
152 static bool lastKeyValid
; /* true if lastKey is valid */
153 static keyType lastKey
; /* last key written */
155 /* read next record using replacement selection */
157 /* check for first call */
161 if (nNodes
< 2) nNodes
= 2;
162 node
= safeMalloc(nNodes
* sizeof(nodeType
));
163 for (i
= 0; i
< nNodes
; i
++) {
164 node
[i
].i
.loser
= &node
[i
].e
;
165 node
[i
].i
.parent
= &node
[i
/2].i
;
166 node
[i
].e
.parent
= &node
[(nNodes
+ i
)/2].i
;
168 node
[i
].e
.valid
= false;
171 lastKeyValid
= false;
173 if ((ifp
= fopen(ifName
, "rb")) == NULL
) {
174 printf("error: file %s, unable to open\n", ifName
);
181 /* replace previous winner with new record */
183 if (fread(&win
->rec
, sizeof(recType
), 1, ifp
) == 1) {
184 if ((!lastKeyValid
|| compLT(win
->rec
.key
, lastKey
))
185 && (++win
->run
> maxRun
))
188 } else if (feof(ifp
)) {
192 win
->run
= maxRun
+ 1;
199 win
->run
= maxRun
+ 1;
202 /* adjust loser and winner pointers */
207 if (p
->loser
->run
< win
->run
) {
209 } else if (p
->loser
->run
== win
->run
) {
210 if (p
->loser
->valid
&& win
->valid
) {
211 if (compLT(p
->loser
->rec
.key
, win
->rec
.key
))
218 /* p should be winner */
226 } while (p
!= &node
[0].i
);
229 if (win
->run
!= curRun
) {
230 /* win->run = curRun + 1 */
231 if (win
->run
> maxRun
) {
239 /* output top of tree */
241 lastKey
= win
->rec
.key
;
248 void makeRuns(void) {
249 recType
*win
; /* winner */
250 int fileT
; /* last file */
251 int fileP
; /* next to last file */
252 int j
; /* selects file[j] */
255 /* Make initial runs using replacement selection.
256 * Runs are written using a Fibonacci distintbution.
259 /* initialize file structures */
260 fileT
= nTmpFiles
- 1;
262 for (j
= 0; j
< fileT
; j
++) {
266 file
[fileT
]->fib
= 0;
267 file
[fileT
]->dummy
= 0;
278 for (j
= 0; win
&& j
<= fileP
; j
++) {
282 if (file
[j
]->valid
) {
283 if (!compLT(win
->key
, file
[j
]->rec
.key
)) {
284 /* append to an existing run */
286 } else if (file
[j
]->dummy
) {
287 /* start a new run */
292 /* first run in file */
302 if (fwrite(win
, sizeof(recType
), 1, file
[j
]->fp
) != 1) {
306 file
[j
]->rec
.key
= win
->key
;
307 file
[j
]->valid
= true;
308 if ((win
= readRec()) == NULL
) break;
309 if (compLT(win
->key
, file
[j
]->rec
.key
)) break;
314 /* if no room for runs, up a level */
319 for (j
= 0; j
<= fileP
; j
++) {
320 file
[j
]->dummy
= t
+ file
[j
+1]->fib
- file
[j
]->fib
;
321 file
[j
]->fib
= t
+ file
[j
+1]->fib
;
327 void rewindFile(int j
) {
328 /* rewind file[j] and read in first record */
329 file
[j
]->eor
= false;
330 file
[j
]->eof
= false;
332 if (fread(&file
[j
]->rec
, sizeof(recType
), 1, file
[j
]->fp
) != 1) {
333 if (feof(file
[j
]->fp
)) {
343 void mergeSort(void) {
349 /* polyphase merge sort */
351 fileT
= nTmpFiles
- 1;
354 /* prime the files */
355 for (j
= 0; j
< fileT
; j
++) {
359 /* each pass through loop merges one run */
368 for (j
= 0; j
<= fileP
; j
++) {
369 if (!file
[j
]->dummy
) {
371 if (!file
[j
]->eof
) anyRuns
= true;
379 /* merge 1 run file[0]..file[P] --> file[T] */
382 /* each pass thru loop writes 1 record to file[fileT] */
384 /* find smallest key */
386 for (j
= 0; j
<= fileP
; j
++) {
387 if (file
[j
]->eor
) continue;
388 if (file
[j
]->dummy
) continue;
390 (k
!= j
&& compGT(file
[k
]->rec
.key
, file
[j
]->rec
.key
)))
395 /* write record[k] to file[fileT] */
396 if (fwrite(&file
[k
]->rec
, sizeof(recType
), 1,
397 file
[fileT
]->fp
) != 1) {
402 /* replace record[k] */
403 lastKey
= file
[k
]->rec
.key
;
404 if (fread(&file
[k
]->rec
, sizeof(recType
), 1,
406 /* check for end of run on file[s] */
407 if (compLT(file
[k
]->rec
.key
, lastKey
))
409 } else if (feof(file
[k
]->fp
)) {
419 for (j
= 0; j
<= fileP
; j
++) {
420 if (file
[j
]->dummy
) file
[j
]->dummy
--;
421 if (!file
[j
]->eof
) file
[j
]->eor
= false;
424 } else if (allDummies
) {
425 for (j
= 0; j
<= fileP
; j
++)
427 file
[fileT
]->dummy
++;
431 if (file
[fileP
]->eof
&& !file
[fileP
]->dummy
) {
432 /* completed a fibonocci-level */
435 /* we're done, file[fileT] contains data */
439 /* fileP is exhausted, reopen as new */
440 fclose(file
[fileP
]->fp
);
441 if ((file
[fileP
]->fp
= fopen(file
[fileP
]->name
, "w+b"))
446 file
[fileP
]->eof
= false;
447 file
[fileP
]->eor
= false;
451 /* f[0],f[1]...,f[fileT] <-- f[fileT],f[0]...,f[T-1] */
453 memmove(file
+ 1, file
, fileT
* sizeof(tmpFileType
*));
457 for (j
= 0; j
<= fileP
; j
++)
458 if (!file
[j
]->eof
) file
[j
]->eor
= false;
473 int main(int argc
, char *argv
[]) {
477 * ext ifName ofName nTmpFiles nNodes
479 * ext in.dat out.dat 5 2000
480 * reads in.dat, sorts using 5 files and 2000 nodes, output to out.dat
483 printf("%s ifName ofName nTmpFiles nNodes\n", argv
[0]);
489 nTmpFiles
= atoi(argv
[3]);
490 nNodes
= atoi(argv
[4]);
492 printf("extSort: nFiles=%d, nNodes=%d, lrecl=%d\n",
493 nTmpFiles
, nNodes
, sizeof(recType
));